last updated: August 30 2021
These are the figures that we are targeting for submission to the Journal of Dental Resesarch.
Data were collected from 152 participants who were screened into 2 cohorts: the low flow cohort (N = 33) consisted of individuals with a presumptive diagnosis of the autoimmune disorder Sjögren’s Syndrome (SS) while the control cohort (N = 119) consisted of otherwise healthy controls. The complete set of inclusion and exclusion criteria for each cohort is provided as supplementary data (Supplementary Table 1).
Gender identity, race, and ethnicity did not significantly differ between the control and low flow cohorts (Chisquare-tests, p > 0.1) (Supplementary Table 2). The majority of subjects in both cohorts were White or Asian and did not identify as Hispanic or Latino. A substantial sex bias was seen in both cohorts: roughly 62% of the control cohort identified as female while 93% of the low flow cohort identified as female, consistent with the known sex bias in the occurrence of Sjogren Syndrome
the PD and BOP regressions need to be changed to ordinal or logistic regression since they are not close to normal and BOP is (0,1)
This is useful: http://pages.stat.wisc.edu/~yandell/R_for_data_sciences/curate/tidyverse.html
Flow rate for 39 came from Day 1 not the ESE; the missing birthdays were recovered from REDCap track changes since Ava and Danielle made undocumented deletions of the provided birthdays.
#load required packages
library("easypackages")
#define packages
libraries("tidyverse",
"phyloseq",
"genefilter",
"plyr",
"knitr",
"modelr",
"gridExtra",
"reshape2",
"purrr",
"stringr",
"ggrepel",
"kableExtra",
"tidyverse",
"cowplot",
"broom",
"ggplot2",
"RColorBrewer",
"viridis",
"ggpubr",
"dplyr",
"ggpmisc",
"ggfortify",
"FactoMineR",
"factoextra",
"colorspace",
"cowplot",
"png",
"sjPlot",
"bestNormalize",
"lmtest",
"effects",
"dabestr",
"vegan",
"DESeq2",
"caret",
'party',
'randomForest',
'rfPermute', '
rpart.plot',
'wesanderson')
set.seed(673)
ggplot2::theme_set(ggplot2::theme_classic())
https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/
#read in data about pids and cohort
pids <- read.csv(file="~/Dropbox/oral-clinical-data-analysis-data/cohort_20191019.csv") #152 row
#read in age
age.df = read.csv("~/Dropbox/oral-clinical-data-analysis-data/age_df.csv") #152 rows
age.df$X = NULL
age.df= plyr::join(age.df, pids)
#read in flow rates
uwsfr = read.csv("~/Dropbox/oral-clinical-data-analysis-data/uwsfr_df.csv") #152 rows
uwsfr$X = NULL
uwsfr= plyr::join(uwsfr, pids)
#read the coordinates
map <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/mytoothdot_coordinates_FullMouth.csv")
map <- na.omit(map)
colnames(map)[4] <- "tooth"
map$X.SampleID = NULL
#read in the caries data
caries.data <- read.csv(file="~/Dropbox/oral-clinical-data-analysis-data/caries_data_v3.0.csv")
caries.df = plyr::join(caries.data, uwsfr)
caries.df = plyr::join(caries.df, age.df)
caries_map = plyr::join(caries.df, map, by="tooth")
#read in the perio data and create a uwsfr subset
perio.data <- read.csv(file="~/Dropbox/oral-clinical-data-analysis-data/periodontal_data_v2.0.csv")
perio.df = plyr::join(perio.data, uwsfr)
perio.df = plyr::join(perio.df, age.df)
perio_map = plyr::join(perio.df, map, by="tooth")
#read in an image of the mouth
#create color so that 0 is midpoint and set as white
#load and setup background mouth diagram
img <- readPNG("~/Dropbox/mouth_smaller.png")
g<- grid::rasterGrob(img, interpolate=TRUE, height = unit(.95,"npc"), width = unit(.9, "npc"))
#transformations
caries_map$DMFS_orderNorm =orderNorm(caries_map$DMFS.bytooth)$x.t
perio_map$cal_orderNorm = orderNorm(perio_map$subject.tooth.mean.cal)$x.t #normal
perio_map$pd_orderNorm = orderNorm(perio_map$subject.tooth.mean.pd)$x.t #normal
perio_map$gmcej_orderNorm =orderNorm(perio_map$subject.tooth.mean.gm.cej)$x.t #normal
perio_map$bop_orderNorm =orderNorm(perio_map$bop)$x.t
Figure 1: Gardner-Altman estimation plots reveal significant demographic and clinical differences between low flow and control cohorts. Data for control and low flow cohort patient Swarm plots of a) uws-fr, b) age, c) dmfs, d) cal and e) gm.cej, f) bop, and g) PD are plotted are plotted on the left-aligned axis. The right-aligned axis reveals the point estimate of the unpaired mean difference between groups, surrounded by their 95% confidence intervals (95% CI).
These data indicate that while probing depths are trend towards higher average PDs in the low flow cohort compared to the controls. BOP did not appear to differ between the two groups.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Figure1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#define a function to summarize the clinical data
summarize_clinical_feature <- function(df, vars) {
mydf <- df %>%
ungroup(.) %>%
dplyr::select(vars) %>%
unique(.) %>%
na.omit(.)
return(mydf)
}
#### Plot uws-fr
uwsfr$complete = complete.cases(uwsfr$uwsfr)
uwsfr$`UWS-FR (mL/min)` = uwsfr$uwsfr
uwsfr = subset(uwsfr, complete==TRUE)
uws.fr.plot <-
uwsfr %>%
dabest(cohort, `UWS-FR (mL/min)`,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
Fig1A = plot(uws.fr.plot, axes.title.fontsize = 12)
#get the mean uws-fr
#detach(package:plyr)
#out = uwsfr %>%
# group_by(cohort) %>%
# summarize(mean_uws = mean(uwsfr, na.rm = TRUE))
#out
#plot age
age.df$`Age (Years)` = age.df$age
age.df = subset(age.df, redcap !=268) # drop 268 since we don't have clinical data for this subject
age.plot <-
age.df %>%
dabest(cohort, `Age (Years)` ,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
Fig1B = plot(age.plot, axes.title.fontsize = 12)
#get the mean ages by cohort
#out = age.df %>%
# group_by(cohort) %>%
# summarize(mean_age = mean(age, na.rm = TRUE))
#out
age.sub = unique(age.df$redcap)
#get the proportion of controls who were over the age of 40 (N=23; 19.33%)
over40c = subset(age.df, age > 40 & cohort=="Control")
c = subset(age.df, cohort=="Control")
nrow(over40c)/nrow(c)
## [1] 0.1932773
#plot dmfs
caries_map <- filter(caries_map, tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)
perio_map <- filter(perio_map, tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)
#This subfigure compares overall DMFS score per subject between cohorts
dmfs.df <- summarize_clinical_feature(df=caries_map, vars=c("redcap", "aim", "DMFS_orderNorm", "cohort"))
dmfs.df2 <- summarize_clinical_feature(df=caries_map, vars=c("redcap", "aim", "DMFS.bysubject", "cohort"))
dmfs.df2$DMFS = dmfs.df2$DMFS.bysubject
dmfs.plot <-
dmfs.df2 %>%
dabest(cohort, DMFS,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
dmfs.sub = unique(dmfs.df2$redcap)
Fig1C = plot(dmfs.plot, axes.title.fontsize = 12)
#get the average dmfs by cohort
#out = dmfs.df2 %>%
# group_by(cohort) %>%
# summarize(mean_dmfs = mean(DMFS.bysubject, na.rm = TRUE))
#out
#it looks like there's a subset with low DMFS, the subset has fewer than 15; when we subset on these, they have a lower average age than the individuals in the upper group
dfms.low = subset(dmfs.df2, DMFS.bysubject < 15)
dfms.low.subjects = dfms.low$redcap
ages_with_low_dmfs = subset(age.df, redcap %in% dfms.low.subjects)
ages_with_higher_dmfs = subset(age.df, !(redcap %in% dfms.low.subjects))
#plot cal
cal.df = summarize_clinical_feature(perio_map, vars=c("redcap", "aim", "mean.cal", "cohort"))
cal.df$`Mean CAL` =cal.df$mean.cal
cal.plot <-
cal.df %>%
dabest(cohort, `Mean CAL`,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
Fig1D = plot(cal.plot, axes.title.fontsize = 12)
#CAL by group
#out = cal.df %>%
# group_by(cohort) %>%
# summarize(mean_cal = mean(mean.cal, na.rm = TRUE))
#out
#plot PD
pd.df <- summarize_clinical_feature(df=perio_map, vars=c("redcap", "aim", "subject.mean.pd", "cohort"))
pd.df$`Mean PD` = pd.df$subject.mean.pd
pd.plot <-
pd.df %>%
dabest(cohort, `Mean PD`,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
Fig1E = plot(pd.plot, axes.title.fontsize = 12)
#plot gmcej between cohorts.
gmcej.df <- summarize_clinical_feature(df=perio_map, vars=c("redcap", "aim", "subject.mean.gm.cej", "cohort"))
gmcej.df$`Mean GM-CEJ` = gmcej.df$subject.mean.gm.cej
gmcej.plot <-
gmcej.df %>%
dabest(cohort, `Mean GM-CEJ`,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
Fig1F = plot(gmcej.plot, axes.title.fontsize = 12)
#CAL by group
#out = gmcej.df %>%
# group_by(cohort) %>%
# summarize(mean_gm = mean(subject.mean.gm.cej, na.rm = TRUE))
#out
#plot bop
bop.df <- summarize_clinical_feature(df=perio_map, vars=c("redcap", "aim", "percent.bop", "cohort"))
bop.df$`Percent BOP` = bop.df$percent.bop
bop.plot <-
bop.df %>%
dabest(cohort, `Percent BOP`,
idx = c("Control", "Low Flow"),
paired = FALSE)%>%
mean_diff()
Fig1G = plot(bop.plot, axes.title.fontsize = 12)
#bop by group
#out = bop.df %>%
# group_by(cohort) %>%
# summarize(mean_bop = mean(percent.bop, na.rm = TRUE))
#out
##### Make Figure 1
Figure1 <- cowplot::plot_grid(
Fig1A + theme(legend.position = "none", axis.text=element_text(size=12)),
Fig1B + theme(legend.position = "none"),
Fig1C + theme(legend.position = "none"),
Fig1D + theme(legend.position = "none"),
Fig1F + theme(legend.position = "none"),
Fig1G + theme(legend.position = "none"),
Fig1E + theme(legend.position = "none"),
labels = "auto", ncol = 2)
Figure1
ggsave(Figure1, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure1.png", width=10, height=12, units="in", dpi=300)
How many “control” subjects have flow rates less than 0.3?
#how many controls have flow rates less than 0.3
control_N0.3 = subset(uwsfr, aim %in% c(1, 2)) %>%
subset(., uwsfr <= 0.3) #35
#how many have flow rates less than 0.2
control_N0.2 = subset(uwsfr, aim %in% c(1, 2)) %>%
subset(., uwsfr <= 0.2) #18
control_N0.1 = subset(uwsfr, aim %in% c(1, 2)) %>%
subset(., uwsfr <= 0.1) #3
Note that these are drawn from the final version of the consent forms.

Chi-square tests indicate that the distribution of patients in each cohort didn’t differ with respect to gender, race, or ethnicity.
#{width=65%}
#read in the race data and perform chi-square tets
race.df = read.csv("~/Dropbox/oral-clinical-data-analysis-data/race_df.csv")[1:7,1:3]
chisq.test(race.df$Controls, race.df$Low.Flow)
##
## Pearson's Chi-squared test
##
## data: race.df$Controls and race.df$Low.Flow
## X-squared = 28, df = 24, p-value = 0.26
#read in ethnicity data and do chi-sq tests
ethnic.df <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/ethnicity_df.csv")[1:3,1:3]
chisq.test(ethnic.df$Control, ethnic.df$Low.Flow)
##
## Pearson's Chi-squared test
##
## data: ethnic.df$Control and ethnic.df$Low.Flow
## X-squared = 6, df = 4, p-value = 0.1991
#read in gender and do chi-square tetst
gender.df <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/gender_df.csv")[1:5,1:3]
chisq.test(gender.df$Control, gender.df$Low.Flow)
##
## Pearson's Chi-squared test
##
## data: gender.df$Control and gender.df$Low.Flow
## X-squared = 7.5, df = 6, p-value = 0.2771
A linear model was applied to data subsetted by universal tooth number excluding wisdom teeth (Teeth# 1,16,17,32). The response variables used were DMFS Score, CAL, PD, GM-CEJ, and BOP. The predictor variables used were Age, Cohort, UWS-FR, and Cohort:UWS-FR. The coefficients and confidence intervals for each tooth was plotted for each linear model.
Compared to those in the low flow cohort, control subjects had fewer caries at several anterior teeth in the maxilla (teeth 6, 8, 11) and mandible (teeth 22, 23, 24, 25), as indicated by the negative regression coefficients (adj.p < 0.05). In addition, while both groups tended to have more caries at posterior sites low flow subjects tended to have significantly more at several maxillary (3, 5, 14) and mandibular molars and pre-molars (18, 19, 21, 28, 30, 31) compared to the control, as indicated by the positive regression coefficients.
#For teeth 7, 8, 22, and 27 the error 0 or 1 probability occurred;
tooth.model = tooth.model[!(tooth.model %in% c(1, 16, 17, 32))]
caries.df <- dplyr::select(caries_map, c("DMFS.bytooth", "cohort", "age", "uwsfr", "tooth")) %>%
filter(., !(tooth %in% c(1, 16, 17, 32) ))
keep = dplyr::select(caries.df, c("DMFS.bytooth",
"tooth"))
caries.df$cohort = plyr:::revalue(caries.df$cohort, c("Control"=0, "Low Flow"=1))
caries.df$cohort = as.numeric(as.character(caries.df$cohort))
caries.df.scaled = data.frame(scale(caries.df[,2:4], center=TRUE, scale=TRUE))
caries.df.scaled = data.frame(cbind(keep), caries.df.scaled)
caries.df.scaled$dmfs_orderNorm = orderNorm(caries.df.scaled$DMFS.bytooth)$x.t #normal
dmfs.model = 'dmfs_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
df = get_spltmap_out(df=caries.df.scaled,
myvector=as.vector(caries.df.scaled$tooth),
model=dmfs.model)
df = define_tooth_class(df)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#combine coefs and confs and filter out intercept
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the DMFS Model
Fig2A = plot_age(df, model="DMFS")
age.sig = subset(df, p.adj < 0.05 & term=="Age") # age impacts all teeth
age.sig$Significant = ifelse(age.sig$p.value <0.05, "Yes","No")
Fig2B = plot_cohort(df, model="DMFS")
dfms.sig = subset(df, p.adj < 0.05 & term=="cohort") # age cohort primarily affects the upper jaw! incisors/canines in lower jaw
#on average difference between anterior to posterior
ant = subset(df, tooth.class %in% c("Canine", "Central.Incisor", "Lateral.Incisor" ) & term=="cohort" )
post = subset(df, tooth.class %in% c("Molar", "Pre.Molar" ) & term=="cohort" )
mean(ant$estimate)/mean(post$estimate)
## [1] -0.4645941
neg = subset(dfms.sig, estimate < 0)
sigPos = subset(dfms.sig, p.adj < 0.05 & estimate >-0.04 )
Fig2C = plot_flow(df, model="DMFS")
uws.sig = subset(df,p.adj < 0.05 & term=="UWS-FR") # 11 teeth
Fig2D = plot_interaction(df, model="DMFS")
int.sig = subset(df, p.adj < 0.05 & term=="Cohort:UWS-FR") # 11 teeth
#
p = subset(caries_map, cohort %in% c("Control", "Low Flow")) %>%
ggplot(., aes(uwsfr , DMFS.bysubject , color=as.factor(cohort)), group=as.factor(cohort)) +
geom_point() + geom_smooth(method = lm)
p1 = subset(caries_map, cohort %in% c("Control", "Low Flow")) %>%
subset(., tooth %in% c(6, 8, 11, 22, 23, 24, 25)) %>%
ggplot(., aes(as.factor(tooth), DMFS_orderNorm, color=cohort, ncol=2)) + geom_boxplot()+
facet_wrap(~tooth, scales="free") +
stat_compare_means() + ggtitle("Negative Coef") + geom_point()
p2 = subset(caries_map, cohort %in% c("Control", "Low Flow")) %>%
subset(., tooth %in% c(3, 5, 14, 18, 19, 21, 28, 30, 31)) %>%
ggplot(., aes(as.factor(tooth), DMFS_orderNorm, color=cohort)) + geom_boxplot()+
facet_wrap(~tooth, scales="free") +
stat_compare_means()+ ggtitle("Positive Coef")+ geom_point()
grid.arrange(p1, p2, ncol=2)
fm = subset(caries_map, tooth %in% c(3, 5, 14, 18, 19, 21, 28, 30, 31))
fm = subset(fm, cohort %in% c("Low Flow", "Control"))
fm1 = doBy::summaryBy(DMFS.bytooth~tooth+cohort, FUN=mean, data=fm)
p = ggplot(fm1, aes(cohort, DMFS.bytooth.mean)) + facet_wrap(~tooth) + geom_point()
fm = subset(caries_map, tooth %in% c(6, 8, 11, 22, 23, 24, 25))
fm = subset(fm, cohort %in% c("Low Flow", "Control"))
fm1 = doBy::summaryBy(DMFS.bytooth~tooth+cohort, FUN=mean, data=fm)
p2 = ggplot(fm1, aes(cohort, DMFS.bytooth.mean)) + facet_wrap(~tooth) + geom_point()
Set up CAL model
plot_cohort <- function(df, model){
p <- ggplot(filter(df, term == "Cohort"), aes(x = tooth, y = estimate, color = tooth.class, shape=Significance)) +
geom_point(size=3) +
geom_hline(yintercept = 0, color = "gray") +
geom_vline(xintercept = 16.5, color = "gray", linetype="dotted") +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) +
coord_flip() +
ggtitle(paste0(model, " ", "~ Cohort")) +
labs(x = "Universal Tooth Number", y = "Control <--> Sjogrens", color = "Tooth Class")
}
perio.df <- dplyr::select(perio_map, c("cal", "pd","gm.cej","bop",
"cohort", "age", "uwsfr", "tooth")) %>%
filter(., tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)
#transformations
keep = dplyr::select(perio.df, c("cal",
"pd",
"gm.cej",
"bop",
"cohort",
"tooth"))
perio.df$cohort = plyr:::revalue(perio.df$cohort, c("Control"=0, "Low Flow"=1))
perio.df$cohort = as.numeric(as.character(perio.df$cohort))
perio.df.scaled = data.frame(scale(perio.df[,5:7], center=TRUE, scale=TRUE))
perio.df.scaled = data.frame(cbind(keep), perio.df.scaled)
perio.df.scaled$cal_orderNorm = orderNorm(perio.df.scaled$cal)$x.t #normal
cal.model = 'cal_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled,
myvector=as.vector(perio.df$tooth),
model=cal.model)
df = define_tooth_class(mycoefficients)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the CAL Model
Fig2E = plot_age(df, model="CAL")
age.sig = subset(df, p.adj < 0.05 & term=="Age") # age impacts all teeth
Fig2F = plot_cohort(df, model="CAL")
cal.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in lower jaw
antMax = subset(cal.sig, tooth %in% c(6, 7, 8, 9, 10, 11))
min(antMax$estimate)
## [1] 0.4170893
max(antMax$estimate)
## [1] 0.5641276
mean(antMax$estimate)
## [1] 0.4827225
antMan = subset(cal.sig, tooth %in% c(22, 23, 24, 25, 26, 27))
min(antMan$estimate)
## [1] 0.3941972
max(antMan$estimate)
## [1] 0.6943266
mean(antMan$estimate)
## [1] 0.5336167
postMax = subset(cal.sig, tooth %in% c(3, 4, 5, 12, 13, 14))
min(postMax$estimate)
## [1] 0.239865
max(postMax$estimate)
## [1] 0.4848494
mean(postMax$estimate)
## [1] 0.3407975
postMan = subset(cal.sig, tooth %in% c(20, 21, 30))
min(postMan$estimate)
## [1] 0.2638477
max(postMan$estimate)
## [1] 0.5714036
mean(postMan$estimate)
## [1] 0.3998269
Fig2G = plot_flow(df, model="CAL")
uws.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth
Fig2H = plot_interaction(df, model="CAL")
int.sig = subset(df, p.adj < 0.05 & term=="Cohort:UWS-FR") # 11 teeth
Let’s do probing depth
perio.df.scaled$pd_orderNorm = orderNorm(perio.df.scaled$pd)$x.t
pd.model = 'pd_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled,
myvector=as.vector(perio.df$tooth),
model=pd.model)
df = define_tooth_class(mycoefficients)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the PD Model
Fig2I = plot_age(df, model="PD")
pd.sig = subset(df, p.adj < 0.05 & term=="Age") # age cohort primarily affects the upper jaw! incisors/canines in
Fig2J = plot_cohort(df, model="PD")
pd.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in
Fig2K = plot_flow(df, model="PD")
pd.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth
Fig2L = plot_interaction(df, model="PD")
Let’s do gm-cej
perio.df.scaled$gm.cej_orderNorm = orderNorm(perio.df.scaled$gm.cej)$x.t
pd.model = 'gm.cej_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled,
myvector=as.vector(perio.df$tooth),
model=pd.model)
df = define_tooth_class(mycoefficients)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the GM-CEJ Model
Fig2M = plot_age(df, model="GM-CEJ")
Fig2N = plot_cohort(df, model="GM-CEJ")
gm.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in
antMax = subset(gm.sig, tooth %in% c(7, 10, 12))
min(antMax$estimate)
## [1] 0.3390851
max(antMax$estimate)
## [1] 0.4351341
mean(antMax$estimate)
## [1] 0.3805562
antMan = subset(gm.sig, tooth %in% c(22, 23, 24, 25, 26, 27))
min(antMan$estimate)
## [1] 0.3065823
max(antMan$estimate)
## [1] 0.632668
mean(antMan$estimate)
## [1] 0.5081951
postMax = subset(gm.sig, tooth %in% c(3, 4, 5, 12, 13, 14))
min(postMax$estimate)
## [1] 0.4351341
max(postMax$estimate)
## [1] 0.4351341
mean(postMax$estimate)
## [1] 0.4351341
postMan = subset(gm.sig, tooth %in% c(20, 21, 30))
min(postMan$estimate)
## [1] 0.2404437
max(postMan$estimate)
## [1] 0.4538664
mean(postMan$estimate)
## [1] 0.347155
Fig2O = plot_flow(df, model="GM-CEJ")
gm.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth
Fig2P = plot_interaction(df, model="GM-CEJ")
int.sig = subset(df, p.adj < 0.05 & term=="Cohort:UWS-FR") # 11 teeth
let’s do bop
perio.df$bop_orderNorm =orderNorm(perio.df$bop)$x.t
bop.model = 'bop_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled,
myvector=as.vector(perio.df$tooth),
model=pd.model)
df = define_tooth_class(mycoefficients)
#combine coefs and confs and filter out intercept
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the BOP Model
Fig2Q = plot_age(df, model="BOP")
Fig2R = plot_cohort(df, model="BOP")
bop.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in
Fig2S = plot_flow(df, model="BOP")
bop.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth
Fig2T = plot_interaction(df, model="BOP")
p = ggplot(perio.df, aes(bop_orderNorm, gm.cej)) + geom_point() + facet_wrap(~tooth)
Figure 2: Explicit spatial modeling reveals independent effects of age and patient cohort on the spatial pattern of dental disease. Coefficients with uncertainty intervals are plotted for a series of per-tooth linear models where DMFS (a), CAL (b), and GM-CEJ (c) were regressed against age, patient cohort, UWS-FR and the interaction between UWS-FR and cohort.
#~~~~~~~~~~~~~~~~~~~~
fig2legend <- cowplot::get_legend(Fig2T)
Fig2 <- cowplot::plot_grid((Fig2A + theme(legend.position = "none")), #DMFS~Age
(Fig2E + theme(legend.position = "none")), #CAL-age
(Fig2M + theme(legend.position = "none")), #GM-CEJ
(Fig2Q + theme(legend.position = "none")), #BOP
(Fig2I + theme(legend.position = "none")),
(Fig2B + theme(legend.position = "none")), #DMFS~Cohort
(Fig2F + theme(legend.position = "none")), #CAL-cohort
(Fig2N + theme(legend.position = "none")), #GM-CEJ
(Fig2R + theme(legend.position = "none")), #BOP
(Fig2J + theme(legend.position = "none")),
(Fig2C + theme(legend.position = "none")), #DMFS~UWS-Fr
(Fig2G + theme(legend.position = "none")),#CAL
(Fig2O + theme(legend.position = "none")), #GM-CEJ
(Fig2S + theme(legend.position = "none")), #BOP
(Fig2K + theme(legend.position = "none")),
(Fig2D + theme(legend.position = "none")), #DMFS~interaction
(Fig2H + theme(legend.position = "none")), #CAL
(Fig2P + theme(legend.position = "none")), #GM-CEJ
(Fig2T + theme(legend.position = "none")), #BOP
(Fig2L + theme(legend.position = "none")),
ncol = 5, labels = c("a", "b", "c", "d",
"e", "f", "g", "h",
"i", "j", "k", "l",
"m", "n", "o", "p",
"q", "r", "s", "t"), hjust = -2.75, vjust = 1.5)
Figure2 <- cowplot::plot_grid(Fig2, fig2legend, nrow = 1, rel_widths = c(1,.3))
ggsave(Figure2, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure2.png", width = 15, height = 10, units ="in",dpi = 300, device = "png")
Figure2
Figure 3: Subgingival and supragingival communities from 3 control and 3 low flow subjects segregate by UWS-FR while age imperfectly separates supragingival communities. Principal coordinates analysis on bray Curtis dissimilarity segregated communities by UWS-FR (a) along the second coordinate which explained 12.8% of the variance. Samples clustered by age (b) for subgingival, but not supragingival sites.
library(wesanderson)
library(ggsci)
add_plot_elements <- function(x) {
theme_update() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
text = element_text(size=12),
axis.text.x = element_text(angle=0, hjust=1),
panel.spacing = unit(0, "lines"),
strip.background = element_blank(),
strip.placement = "outside")
}
pal <- wes_palette("Zissou1", 10, type = "continuous")
pal2 = wes_palette("Royal1", 5, "continuous")
buda.pal <-wes_palette("Cavalcanti1", 10, "continuous")
#read in the microbial data and create a unified mapping file that includes the clinical data
clin2microbe = read.csv("~/Dropbox/oral-clinical-data-analysis-data/clin2microbemap.csv")
illumina = readRDS("~/Dropbox/oral-clinical-data-analysis-data/periodontology2000_hypo.RDS")
illumina = subset_samples(illumina, Protocol=="Clinic")
old.map = data.frame(sample_data(illumina))
new_map =merge(old.map, clin2microbe)
new_map$Duplication = duplicated(new_map$X.SampleID)
new_map = subset(new_map, Duplication !="TRUE")
rownames(new_map) = new_map$X.SampleID
sample_data(illumina) = sample_data(new_map)
sample_data(illumina)$DMFS = sample_data(illumina)$DMFS.bytooth
sample_data(illumina)$Habitat_Class = ifelse(sample_data(illumina)$Habitat_Class=="Supra",
"Supragingival", "Subgingival")
# use phyloseq to ordinate the phyloseq object
ord = ordinate(illumina, method="PCoA", distance="bray")
sample_data(illumina)$Cohort = ifelse(sample_data(illumina)$Aim=="SA1", "Control", "Low Flow")
p1 = plot_ordination(illumina, ord, type="samples", color="uwsfr", shape="Cohort") +
scale_color_gradientn(colours = pal) +
facet_wrap(~Habitat_Class, scales="free") + theme_classic() + add_plot_elements()
p2 = plot_ordination(illumina, ord, type="samples", color="age", shape="Cohort") +
scale_color_viridis_c() +
facet_wrap(~Habitat_Class, scales="free")+ theme_classic() + add_plot_elements()
sample_data(illumina)$DMFS.bytooth = as.factor(sample_data(illumina)$DMFS.bytooth)
p3 = plot_ordination(illumina, ord, type="samples", color="DMFS.bytooth", shape="Cohort") +
scale_color_manual(values=buda.pal) +
facet_wrap(~Habitat_Class, scales="free") + theme_classic() + geom_point()+ add_plot_elements()
Figure3 <- cowplot::plot_grid((p1),
(p2),
(p3),
ncol = 1, labels = c("a", "b", "c"), hjust = -2.75, vjust = 1.5)
Figure3
ggsave(Figure3, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure3.png", width = 8, height = 10, units ="in",dpi = 300, device = "png")
how does this related to spatial patterning
ordDf = p2$data %>%
subset(., Axis.2 > 0) %>%
subset(., Aim=="SA1")
modOut = p2$data %>%
ggplot(., aes(Tooth_Class, Axis.1)) + facet_wrap(Cohort~Habitat_Class, scales="free_x") + geom_boxplot()+
stat_compare_means() +geom_point()
modOut
ggsave(modOut, file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS3.png")
Reference: http://userweb.eng.gla.ac.uk/umer.ijaz/bioinformatics/ggplot2/CCA.R
library(ggvegan)
buda.pal <-wes_palette("Cavalcanti1", 10, "continuous")
#set an analysis up for CCA
phyCLR= transform_sample_counts(illumina, function(x) x/sum(x))
#phyCLR = transform_sample_counts(illumina, function(x) compositions::clr(x))
map1 = data.frame(sample_data(phyCLR)$Tooth_Class, sample_data(phyCLR)$uwsfr,
sample_data(phyCLR)$DMFS.bytooth,
sample_data(phyCLR)$subject.tooth.mean.pd, sample_data(phyCLR)$subject.tooth.mean.cal,
sample_data(phyCLR)$subject.tooth.mean.gm.cej, sample_data(phyCLR)$age )
Z=data.frame(sample_data(phyCLR)$Subject)
colnames(map1) = c("Class", "uws_fr", "dmfs", "pd", "cal", "gmcej", "age")
map1$Class = as.numeric(map1$Class)
subs = as.vector(sample_data(phyCLR)$Subject)
runs = as.vector(sample_data(phyCLR)$Pool_Name)
map1$uws_fr = abs(map1$uws_fr)
otus = as.matrix(otu_table(phyCLR))
#for some reason this wworks on tsp = tax_glom(phy, "Genus")
attach(map)
ord = cca(otus ~ map1$uws_fr+map1$dmfs+map1$pd+map1$cal+map1$gmcej+map1$age, Z=Z, Ssitenv=map1) # add
ord.out<-scores(ord,display=c("sp","wa","lc","bp","cn"))
#Extract site data first
df_sites<-data.frame(ord.out$sites,data.frame(sample_data(phyCLR)))
#Draw sites
p<-ggplot()
p<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort), size=2) +
theme_classic()
#Draw biplots
mupltiplier <- vegan:::ordiArrowMul(ord.out$biplot)
df_arrows<- ord.out$biplot*2
colnames(df_arrows)<-c("x","y")
df_arrows=as.data.frame(df_arrows)
foo = reshape2::colsplit(rownames(df_arrows), "([$])", c("junk", "variable"))
foo = plyr::revalue(as.factor(foo$variable), c("uws_fr"="UWS-FR",
"dmfs1"="DMFS1",
"dmfs2"="DMS2",
"pd"="PD",
"cal"="CAL",
"gmcej"="GM-CEJ"))
rownames(df_arrows) = foo
p<-p+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm")))
p<-p+geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows)))
# Draw species
df_species<- as.data.frame(ord.out$species)
colnames(df_species)<-c("x","y")
df_species$ASV = rownames(df_species)
tax = data.frame(phyCLR@tax_table@.Data, ASV=rownames(phyCLR@tax_table@.Data), Abundance=taxa_sums(phyCLR))
dfTax = plyr::join(tax, df_species)
dfTax <- dfTax[order(dfTax$Abundance, decreasing=TRUE), ]
dfTax$ASV_Number = paste0("ASV", 1:nrow(dfTax))
dfTax = dfTax[1:15,]
# Either choose text or points
p<-p+geom_point(data=dfTax,aes(x,y, color=Genus), size=3)+
scale_color_manual(values=c("#FFDEE6","#03919B",
"#FE9866", "#BCEAF6", "#B4CF68",
"#0A5947","#FCCEB2","#C17A2A","#EC5064"))
Fig3A<-p+geom_text(data=dfTax,aes(x,y, label=ASV_Number))
p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=DMFS.bytooth), size=2) +
theme_classic()
Fig3B<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
guides(colour=guide_legend(title="DMFS"))
p = ggplot()
Fig3C<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=subject.tooth.mean.cal), size=2) +
theme_classic() +
scale_color_gradientn(colours = pal) +
geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
guides(colour=guide_legend(title="Mean CAL"))
p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=Habitat_Class), size=2) +
theme_classic()
Fig3D<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows)))
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=Subject), size=2) +
theme_classic()
Fig3E<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
guides(colour=guide_legend(title="DMFS"))
p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=uwsfr), size=2) +
theme_classic()
Fig3F<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
scale_color_viridis()
p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=DMFS.bytooth ), size=2) +
theme_classic()
Fig3G<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
arrow = arrow(length = unit(0.2, "cm"))) +
geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows)))
grid.arrange(Fig3B,
Fig3E,
Fig3C,
Fig3A,
Fig3F,
Fig3G,ncol=2)
ggsave(
grid.arrange(Fig3B,
Fig3E,
Fig3C,
Fig3A, ncol=2), file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS4.png", width = 14, height = 10)
library(seqRFLP)
x = data.frame(select(dfTax, c("ASV_Number", "ASV")))
foo=dataframe2fas(x, file = "~/Dropbox/oral-clinical-data-analysis/top30.fasta")
library(vegan)
###do relative ab?undance transformation and see if the patterns are the same
illuminaRA = transform_sample_counts(illumina, function(x) x / sum(x) )
#replace the otu table with hellinger transformed data
otus = data.frame(otu_table(illuminaRA))
outs.h = decostand(otus, "hellinger")
otu_table(illuminaRA) = otu_table(outs.h, taxa_are_rows=FALSE)
#make a list of nmds objects
dist_methods_deco = c("binomial", "bray", "canberra", "euclidean", "gower", "jaccard", "kulczynski", "manhattan")
nmds_ideco.list <- vector("list", length(dist_methods_deco))
plot_ideco.list <- vector("list", length(dist_methods_deco))
for(i in 1:length(dist_methods_deco)){
iDist <- vegan::vegdist(outs.h, method=dist_methods_deco[i])
iMDS <- ordinate(illuminaRA, method="PCoA", distance=iDist, correction="lingoes")
p <- plot_ordination(illuminaRA, iMDS, color="Tooth_Class") + facet_wrap(Jaw_Quadrant~Tooth_Aspect)
ps <- p$data
ps$Distance <- dist_methods_deco[[i]]
plot_ideco.list[[i]] <- ps
nmds_ideco.list[[i]] <- iMDS
}
plot_ideco <- do.call("rbind", plot_ideco.list)
p1=ggplot(plot_ideco, aes(Axis.1, Axis.2, color=uwsfr)) +
facet_wrap(Distance~Habitat_Class, scales="free", ncol=2) + geom_point() + theme_bw()+
scale_color_gradientn(colours = pal) + ggtitle("UWSFR")
p2=ggplot(plot_ideco, aes(Axis.1, Axis.2, color=age)) +
facet_wrap(Distance~Habitat_Class, scales="free", ncol=2) + geom_point() + theme_bw()+
ggtitle("Age") + scale_color_viridis_c()
p3=ggplot(plot_ideco, aes(Axis.1, Axis.2, color=DMFS.bytooth)) +
facet_wrap(Distance~Habitat_Class, scales="free", ncol=2) + geom_point() + theme_bw()+
ggtitle("DMFS")+scale_color_manual(values=buda.pal)
grid.arrange(p1, p2, p3, ncol=3)
ggsave(grid.arrange(p1, p2, p3, ncol=3), file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS2.png", width = 12, height = 14, units ="in",dpi = 300)
This plot shows that dry_speak, dry_qol, dry_swallow are correlated together, dry_tongue, dry_throat, dry_palate are together, and dry_lips is alone. This suggests that these grouped variables vary together. These plots also show that PC1 vs PC2 separates the data primarily by cohort. This suggests that these variables may be used to possibly distinguish between cohort.
quest.df <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/dry_mouth_questionnaire_20190915.csv")
#add cohort variable and select only the VAS varaibles for input to a PCoA
quest.df$cohort <- ifelse(quest.df$aim == 3, "Sjögren's", "Control")
pca_in <- quest.df %>%
dplyr::select(., phq_dry_swallow:phq_dry_palate)
quest.df$cat = ifelse(quest.df$uwsfr < 0.1, "Lower", "Higher")
## Plot the PCoA
library("FactoMineR")
res.pca <- PCA(pca_in, graph = FALSE)
Figure5A = fviz_pca_biplot(res.pca,
col.ind = quest.df$cat, palette = "jco",
addEllipses = FALSE, label = "var",
col.var = "black", repel = FALSE,
legend.title = "Flow Rate Category")
ggsave(Figure5A, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure5a.png", device="png", height = 4, width = 8)
Supplementary figure S4: raw data underlying VSQ
# plot the raw data
vsq = melt(quest.df, id.vars = c("redcap", "aim", "uwsfr", "cohort", "X"))
vsq$value = as.numeric(as.character(vsq$value))
vsq = subset(vsq, variable != "cat")
FigureS4 = ggplot(vsq, aes(cohort, value, fill=cohort)) +
geom_boxplot() + facet_wrap(~variable, ncol=4) + ylab("VAS Rating (0-100)") + theme_classic()+
stat_compare_means()
FigureS4
ggsave(FigureS4, file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS4.png", device="png", height = 6, width = 8)
Conditional inference tree was used to identify the variables that most robustly separated patients into groups based on low (UWS-FR < 0.1ml/min) and not low (UWS-FR > 0.1 ml/min) subgroups. This model accurately identified 84.6% (11/13) subjects as having salivary flow rates lower than 0.1 ml/min. Most subjects who indicated that low salivary flow negatively impacted their overall quality of life (VAS >=56) had flow rates of less than 0.1 ml/min. On the other hand, most subjects who rated a negligible impact of low flow on their quality of life as well as an absence of dryness on their lips had flow rates exceeding 0.1 ml/min.
library(rpart)
library(rpart.plot)
# CART model
latlontree = rpart(uwsfr ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate, data=quest.df, method="anova")
quest.df$cohort = as.factor(as.character(quest.df$cohort))
latlontree = rpart(cohort ~ phq_dry_swallow + uwsfr, data=quest.df)
library(party)
quest.df$cat = ifelse(quest.df$uwsfr < 0.1, "low", "not.low")
quest.df$cat = as.factor(quest.df$cat)
fit <- ctree(cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate + uwsfr,
data=quest.df)
### extract terminal node ID, two ways
all.equal(predict(fit, type = "node"), where(fit))
## [1] TRUE
#get classification
table(predict(fit), quest.df$cat)
##
## low not.low
## low 11 2
## not.low 6 111
#estimated class probabilities
tr <- treeresponse(fit, newdata = quest.df)
tr = do.call(rbind, tr)
tr
## [,1] [,2]
## [1,] 0.0200000 0.9800000
## [2,] 0.0200000 0.9800000
## [3,] 0.0200000 0.9800000
## [4,] 0.0200000 0.9800000
## [5,] 0.0200000 0.9800000
## [6,] 0.0200000 0.9800000
## [7,] 0.0200000 0.9800000
## [8,] 0.8461538 0.1538462
## [9,] 0.0200000 0.9800000
## [10,] 0.0200000 0.9800000
## [11,] 0.2352941 0.7647059
## [12,] 0.0200000 0.9800000
## [13,] 0.0200000 0.9800000
## [14,] 0.2352941 0.7647059
## [15,] 0.0200000 0.9800000
## [16,] 0.2352941 0.7647059
## [17,] 0.0200000 0.9800000
## [18,] 0.2352941 0.7647059
## [19,] 0.0200000 0.9800000
## [20,] 0.8461538 0.1538462
## [21,] 0.2352941 0.7647059
## [22,] 0.2352941 0.7647059
## [23,] 0.2352941 0.7647059
## [24,] 0.8461538 0.1538462
## [25,] 0.8461538 0.1538462
## [26,] 0.2352941 0.7647059
## [27,] 0.0200000 0.9800000
## [28,] 0.0200000 0.9800000
## [29,] 0.8461538 0.1538462
## [30,] 0.0200000 0.9800000
## [31,] 0.0200000 0.9800000
## [32,] 0.2352941 0.7647059
## [33,] 0.8461538 0.1538462
## [34,] 0.8461538 0.1538462
## [35,] 0.0200000 0.9800000
## [36,] 0.8461538 0.1538462
## [37,] 0.0200000 0.9800000
## [38,] 0.0200000 0.9800000
## [39,] 0.0200000 0.9800000
## [40,] 0.0200000 0.9800000
## [41,] 0.8461538 0.1538462
## [42,] 0.2352941 0.7647059
## [43,] 0.0200000 0.9800000
## [44,] 0.0200000 0.9800000
## [45,] 0.0200000 0.9800000
## [46,] 0.8461538 0.1538462
## [47,] 0.8461538 0.1538462
## [48,] 0.2352941 0.7647059
## [49,] 0.0200000 0.9800000
## [50,] 0.0200000 0.9800000
## [51,] 0.0200000 0.9800000
## [52,] 0.0200000 0.9800000
## [53,] 0.0200000 0.9800000
## [54,] 0.0200000 0.9800000
## [55,] 0.0200000 0.9800000
## [56,] 0.0200000 0.9800000
## [57,] 0.0200000 0.9800000
## [58,] 0.0200000 0.9800000
## [59,] 0.0200000 0.9800000
## [60,] 0.0200000 0.9800000
## [61,] 0.0200000 0.9800000
## [62,] 0.0200000 0.9800000
## [63,] 0.2352941 0.7647059
## [64,] 0.0200000 0.9800000
## [65,] 0.0200000 0.9800000
## [66,] 0.0200000 0.9800000
## [67,] 0.0200000 0.9800000
## [68,] 0.0200000 0.9800000
## [69,] 0.0200000 0.9800000
## [70,] 0.2352941 0.7647059
## [71,] 0.0200000 0.9800000
## [72,] 0.0200000 0.9800000
## [73,] 0.0200000 0.9800000
## [74,] 0.0200000 0.9800000
## [75,] 0.0200000 0.9800000
## [76,] 0.2352941 0.7647059
## [77,] 0.0200000 0.9800000
## [78,] 0.0200000 0.9800000
## [79,] 0.0200000 0.9800000
## [80,] 0.0200000 0.9800000
## [81,] 0.0200000 0.9800000
## [82,] 0.0200000 0.9800000
## [83,] 0.0200000 0.9800000
## [84,] 0.0200000 0.9800000
## [85,] 0.0200000 0.9800000
## [86,] 0.0200000 0.9800000
## [87,] 0.0200000 0.9800000
## [88,] 0.0200000 0.9800000
## [89,] 0.0200000 0.9800000
## [90,] 0.0200000 0.9800000
## [91,] 0.2352941 0.7647059
## [92,] 0.0200000 0.9800000
## [93,] 0.0200000 0.9800000
## [94,] 0.0200000 0.9800000
## [95,] 0.0200000 0.9800000
## [96,] 0.0200000 0.9800000
## [97,] 0.0200000 0.9800000
## [98,] 0.2352941 0.7647059
## [99,] 0.0200000 0.9800000
## [100,] 0.0200000 0.9800000
## [101,] 0.0200000 0.9800000
## [102,] 0.0200000 0.9800000
## [103,] 0.0200000 0.9800000
## [104,] 0.8461538 0.1538462
## [105,] 0.2352941 0.7647059
## [106,] 0.0200000 0.9800000
## [107,] 0.0200000 0.9800000
## [108,] 0.0200000 0.9800000
## [109,] 0.0200000 0.9800000
## [110,] 0.0200000 0.9800000
## [111,] 0.0200000 0.9800000
## [112,] 0.0200000 0.9800000
## [113,] 0.0200000 0.9800000
## [114,] 0.0200000 0.9800000
## [115,] 0.0200000 0.9800000
## [116,] 0.0200000 0.9800000
## [117,] 0.0200000 0.9800000
## [118,] 0.0200000 0.9800000
## [119,] 0.0200000 0.9800000
## [120,] 0.0200000 0.9800000
## [121,] 0.0200000 0.9800000
## [122,] 0.0200000 0.9800000
## [123,] 0.0200000 0.9800000
## [124,] 0.0200000 0.9800000
## [125,] 0.0200000 0.9800000
## [126,] 0.0200000 0.9800000
## [127,] 0.0200000 0.9800000
## [128,] 0.0200000 0.9800000
## [129,] 0.8461538 0.1538462
## [130,] 0.0200000 0.9800000
prob = data.frame(tr, quest.df)
prob
## X1 X2 X redcap aim phq_dry_swallow phq_dry_qol
## 1 0.0200000 0.9800000 1 17 1 0 0
## 2 0.0200000 0.9800000 29 18 1 0 0
## 3 0.0200000 0.9800000 57 23 1 19 16
## 4 0.0200000 0.9800000 81 36 1 1 1
## 5 0.0200000 0.9800000 109 46 1 29 18
## 6 0.0200000 0.9800000 137 49 1 0 1
## 7 0.0200000 0.9800000 161 63 1 0 0
## 8 0.8461538 0.1538462 193 66 3 99 100
## 9 0.0200000 0.9800000 221 136 1 0 0
## 10 0.0200000 0.9800000 249 143 1 0 0
## 11 0.2352941 0.7647059 277 170 1 50 50
## 12 0.0200000 0.9800000 305 171 1 2 0
## 13 0.0200000 0.9800000 333 175 1 5 5
## 14 0.2352941 0.7647059 361 178 3 31 29
## 15 0.0200000 0.9800000 389 181 1 3 0
## 16 0.2352941 0.7647059 417 183 1 50 50
## 17 0.0200000 0.9800000 445 190 1 0 3
## 18 0.2352941 0.7647059 473 200 3 35 30
## 19 0.0200000 0.9800000 501 202 1 1 0
## 20 0.8461538 0.1538462 529 218 3 65 65
## 21 0.2352941 0.7647059 557 219 3 50 50
## 22 0.2352941 0.7647059 585 221 3 50 50
## 23 0.2352941 0.7647059 611 226 3 50 50
## 24 0.8461538 0.1538462 635 236 3 76 71
## 25 0.8461538 0.1538462 666 239 3 100 100
## 26 0.2352941 0.7647059 694 247 3 50 50
## 27 0.0200000 0.9800000 721 248 3 40 48
## 28 0.0200000 0.9800000 749 254 1 7 5
## 29 0.8461538 0.1538462 775 266 3 75 65
## 30 0.0200000 0.9800000 799 270 2 6 11
## 31 0.0200000 0.9800000 827 274 1 0 1
## 32 0.2352941 0.7647059 849 277 1 50 50
## 33 0.8461538 0.1538462 877 279 3 51 90
## 34 0.8461538 0.1538462 906 280 3 30 58
## 35 0.0200000 0.9800000 931 281 2 0 0
## 36 0.8461538 0.1538462 959 282 3 90 90
## 37 0.0200000 0.9800000 985 283 3 15 5
## 38 0.0200000 0.9800000 1013 288 2 2 0
## 39 0.0200000 0.9800000 1037 289 1 0 0
## 40 0.0200000 0.9800000 1065 294 1 0 0
## 41 0.8461538 0.1538462 1094 295 3 71 73
## 42 0.2352941 0.7647059 1124 297 3 80 47
## 43 0.0200000 0.9800000 1148 298 1 4 2
## 44 0.0200000 0.9800000 1176 299 1 0 0
## 45 0.0200000 0.9800000 1208 300 1 1 3
## 46 0.8461538 0.1538462 1236 303 3 73 64
## 47 0.8461538 0.1538462 1257 307 3 97 98
## 48 0.2352941 0.7647059 1284 308 1 15 14
## 49 0.0200000 0.9800000 1312 309 1 2 0
## 50 0.0200000 0.9800000 1338 310 1 3 2
## 51 0.0200000 0.9800000 1366 313 1 3 3
## 52 0.0200000 0.9800000 1394 314 1 0 0
## 53 0.0200000 0.9800000 1422 315 2 3 3
## 54 0.0200000 0.9800000 1450 317 1 0 0
## 55 0.0200000 0.9800000 1478 318 1 25 0
## 56 0.0200000 0.9800000 1506 319 1 10 0
## 57 0.0200000 0.9800000 1530 321 1 15 0
## 58 0.0200000 0.9800000 1558 323 2 0 0
## 59 0.0200000 0.9800000 1584 324 1 3 1
## 60 0.0200000 0.9800000 1612 328 2 2 3
## 61 0.0200000 0.9800000 1642 329 2 1 0
## 62 0.0200000 0.9800000 1671 330 1 8 12
## 63 0.2352941 0.7647059 1703 331 3 43 56
## 64 0.0200000 0.9800000 1730 336 2 60 7
## 65 0.0200000 0.9800000 1758 337 1 8 10
## 66 0.0200000 0.9800000 1786 339 2 0 2
## 67 0.0200000 0.9800000 1818 340 1 1 3
## 68 0.0200000 0.9800000 1846 341 2 3 0
## 69 0.0200000 0.9800000 1873 342 2 14 7
## 70 0.2352941 0.7647059 1901 343 2 50 50
## 71 0.0200000 0.9800000 1929 344 2 0 2
## 72 0.0200000 0.9800000 1960 345 2 0 0
## 73 0.0200000 0.9800000 1989 346 2 20 24
## 74 0.0200000 0.9800000 2019 347 1 0 0
## 75 0.0200000 0.9800000 2047 348 1 0 2
## 76 0.2352941 0.7647059 2072 350 1 1 49
## 77 0.0200000 0.9800000 2100 352 2 1 0
## 78 0.0200000 0.9800000 2128 353 1 0 0
## 79 0.0200000 0.9800000 2156 354 2 0 0
## 80 0.0200000 0.9800000 2184 356 1 0 0
## 81 0.0200000 0.9800000 2212 358 2 0 0
## 82 0.0200000 0.9800000 2244 359 2 0 0
## 83 0.0200000 0.9800000 2272 361 1 0 0
## 84 0.0200000 0.9800000 2300 366 1 0 0
## 85 0.0200000 0.9800000 2328 367 2 0 0
## 86 0.0200000 0.9800000 2356 369 2 0 0
## 87 0.0200000 0.9800000 2384 371 2 12 6
## 88 0.0200000 0.9800000 2412 372 2 51 0
## 89 0.0200000 0.9800000 2440 376 1 0 0
## 90 0.0200000 0.9800000 2466 377 2 0 1
## 91 0.2352941 0.7647059 2492 378 3 74 10
## 92 0.0200000 0.9800000 2520 379 1 1 3
## 93 0.0200000 0.9800000 2547 380 1 5 0
## 94 0.0200000 0.9800000 2578 381 1 0 0
## 95 0.0200000 0.9800000 2602 382 1 7 4
## 96 0.0200000 0.9800000 2631 383 1 0 0
## 97 0.0200000 0.9800000 2657 384 1 0 0
## 98 0.2352941 0.7647059 2685 385 3 49 24
## 99 0.0200000 0.9800000 2708 386 1 3 2
## 100 0.0200000 0.9800000 2736 387 1 0 0
## 101 0.0200000 0.9800000 2764 389 1 0 0
## 102 0.0200000 0.9800000 2790 390 1 0 0
## 103 0.0200000 0.9800000 2818 391 1 0 0
## 104 0.8461538 0.1538462 2850 393 3 70 78
## 105 0.2352941 0.7647059 2876 395 1 0 0
## 106 0.0200000 0.9800000 2904 396 1 1 0
## 107 0.0200000 0.9800000 2936 398 1 0 0
## 108 0.0200000 0.9800000 2964 402 1 15 7
## 109 0.0200000 0.9800000 2992 404 1 2 0
## 110 0.0200000 0.9800000 3020 405 1 0 0
## 111 0.0200000 0.9800000 3048 412 1 0 0
## 112 0.0200000 0.9800000 3078 413 1 3 0
## 113 0.0200000 0.9800000 3109 415 1 0 0
## 114 0.0200000 0.9800000 3137 416 1 0 0
## 115 0.0200000 0.9800000 3163 418 1 0 1
## 116 0.0200000 0.9800000 3189 419 2 0 0
## 117 0.0200000 0.9800000 3217 420 2 0 0
## 118 0.0200000 0.9800000 3246 421 1 0 0
## 119 0.0200000 0.9800000 3272 422 2 0 0
## 120 0.0200000 0.9800000 3299 423 2 0 0
## 121 0.0200000 0.9800000 3326 427 1 0 0
## 122 0.0200000 0.9800000 3354 428 1 0 0
## 123 0.0200000 0.9800000 3382 429 2 0 0
## 124 0.0200000 0.9800000 3407 430 1 4 0
## 125 0.0200000 0.9800000 3435 433 1 1 4
## 126 0.0200000 0.9800000 3463 435 2 50 50
## 127 0.0200000 0.9800000 3493 436 2 8 0
## 128 0.0200000 0.9800000 3524 437 2 0 0
## 129 0.8461538 0.1538462 3552 438 2 71 71
## 130 0.0200000 0.9800000 3580 439 1 5 3
## phq_dry_speak phq_dry_throat phq_dry_tongue phq_dry_lips phq_dry_palate
## 1 0 0 0 8 5
## 2 0 5 0 10 0
## 3 16 25 22 35 29
## 4 1 1 1 3 1
## 5 29 48 28 56 62
## 6 0 0 0 0 0
## 7 0 0 0 27 0
## 8 99 75 90 95 95
## 9 0 0 0 10 0
## 10 0 0 0 0 0
## 11 50 50 50 50 50
## 12 0 0 0 0 0
## 13 4 5 4 4 5
## 14 17 21 36 50 33
## 15 0 5 7 57 16
## 16 50 50 50 50 50
## 17 2 4 2 42 44
## 18 40 55 40 50 55
## 19 0 0 0 22 9
## 20 75 75 75 75 75
## 21 50 1 71 73 50
## 22 50 50 50 50 50
## 23 50 50 50 50 50
## 24 47 69 75 82 80
## 25 86 95 100 93 100
## 26 50 95 95 100 95
## 27 51 67 0 67 69
## 28 8 8 9 16 10
## 29 60 80 60 85 55
## 30 5 19 2 15 13
## 31 0 0 0 16 0
## 32 50 50 50 50 50
## 33 85 50 51 100 51
## 34 58 80 80 80 78
## 35 0 0 0 7 10
## 36 95 15 90 85 90
## 37 25 11 4 66 16
## 38 3 2 2 2 2
## 39 0 0 0 0 0
## 40 0 0 0 10 0
## 41 73 50 48 71 53
## 42 75 86 72 61 70
## 43 2 4 4 3 1
## 44 0 0 0 0 0
## 45 1 1 0 1 2
## 46 73 64 82 88 76
## 47 99 50 51 50 52
## 48 0 49 49 62 49
## 49 0 0 0 24 0
## 50 0 3 0 0 1
## 51 6 6 6 7 1
## 52 0 0 0 23 0
## 53 0 2 1 2 0
## 54 0 0 0 24 0
## 55 25 0 0 25 0
## 56 2 22 12 24 10
## 57 15 0 0 15 2
## 58 0 0 0 3 0
## 59 3 0 0 31 3
## 60 2 4 2 3 2
## 61 0 6 10 34 3
## 62 6 9 10 28 7
## 63 55 51 46 61 49
## 64 7 20 20 52 33
## 65 6 12 8 53 11
## 66 3 2 0 0 0
## 67 3 3 11 4 6
## 68 0 0 0 0 0
## 69 2 6 7 11 9
## 70 50 50 50 50 1
## 71 1 2 2 3 1
## 72 0 0 0 11 0
## 73 19 6 15 49 50
## 74 0 0 0 0 0
## 75 1 0 0 5 0
## 76 50 48 50 49 50
## 77 0 16 3 10 0
## 78 0 2 0 0 0
## 79 0 0 0 0 0
## 80 3 3 0 0 0
## 81 0 17 15 21 8
## 82 0 20 0 34 0
## 83 0 16 11 29 26
## 84 0 0 0 1 0
## 85 8 0 0 22 0
## 86 0 0 0 10 0
## 87 22 16 0 13 3
## 88 6 6 7 18 6
## 89 0 26 1 27 0
## 90 50 0 0 28 0
## 91 0 65 61 77 73
## 92 27 27 24 4 4
## 93 0 2 0 0 0
## 94 0 0 0 5 0
## 95 6 3 9 7 6
## 96 0 0 0 49 0
## 97 0 0 0 25 0
## 98 24 51 49 49 50
## 99 0 0 0 0 0
## 100 0 0 0 0 0
## 101 0 0 0 11 0
## 102 0 0 11 26 0
## 103 0 0 0 0 0
## 104 85 80 82 83 85
## 105 0 50 50 50 50
## 106 0 0 0 0 0
## 107 0 0 0 71 0
## 108 8 6 8 25 4
## 109 2 0 3 8 0
## 110 0 2 3 50 1
## 111 0 0 0 50 0
## 112 1 0 0 24 0
## 113 0 0 0 11 0
## 114 0 0 0 0 0
## 115 21 26 18 49 7
## 116 0 0 0 0 0
## 117 0 13 15 29 29
## 118 0 0 0 20 0
## 119 0 10 0 20 3
## 120 0 0 1 2 3
## 121 0 0 0 0 0
## 122 0 0 20 66 22
## 123 1 0 0 0 0
## 124 0 0 1 0 1
## 125 3 3 1 49 3
## 126 50 8 4 8 7
## 127 0 0 0 8 0
## 128 0 0 0 0 0
## 129 67 68 78 91 62
## 130 6 3 5 22 4
## uwsfr cohort cat
## 1 0.611 Control not.low
## 2 0.410 Control not.low
## 3 0.301 Control not.low
## 4 0.511 Control not.low
## 5 0.334 Control not.low
## 6 0.562 Control not.low
## 7 0.422 Control not.low
## 8 0.024 Sjögren's low
## 9 1.107 Control not.low
## 10 0.502 Control not.low
## 11 0.789 Control not.low
## 12 0.745 Control not.low
## 13 0.311 Control not.low
## 14 0.000 Sjögren's low
## 15 0.751 Control not.low
## 16 0.413 Control not.low
## 17 0.119 Control not.low
## 18 0.232 Sjögren's not.low
## 19 0.842 Control not.low
## 20 0.676 Sjögren's not.low
## 21 0.000 Sjögren's low
## 22 0.077 Sjögren's low
## 23 0.114 Sjögren's not.low
## 24 0.083 Sjögren's low
## 25 0.000 Sjögren's low
## 26 0.056 Sjögren's low
## 27 0.142 Sjögren's not.low
## 28 0.593 Control not.low
## 29 0.050 Sjögren's low
## 30 0.721 Control not.low
## 31 0.340 Control not.low
## 32 0.391 Control not.low
## 33 0.065 Sjögren's low
## 34 0.000 Sjögren's low
## 35 0.252 Control not.low
## 36 0.010 Sjögren's low
## 37 0.492 Sjögren's not.low
## 38 0.187 Control not.low
## 39 0.340 Control not.low
## 40 0.364 Control not.low
## 41 0.000 Sjögren's low
## 42 0.137 Sjögren's not.low
## 43 0.383 Control not.low
## 44 1.796 Control not.low
## 45 1.065 Control not.low
## 46 0.001 Sjögren's low
## 47 0.052 Sjögren's low
## 48 0.193 Control not.low
## 49 0.684 Control not.low
## 50 0.314 Control not.low
## 51 0.341 Control not.low
## 52 0.852 Control not.low
## 53 0.356 Control not.low
## 54 0.414 Control not.low
## 55 0.409 Control not.low
## 56 0.106 Control not.low
## 57 0.318 Control not.low
## 58 0.115 Control not.low
## 59 0.097 Control low
## 60 0.234 Control not.low
## 61 0.383 Control not.low
## 62 0.250 Control not.low
## 63 0.261 Sjögren's not.low
## 64 0.368 Control not.low
## 65 0.412 Control not.low
## 66 0.294 Control not.low
## 67 0.247 Control not.low
## 68 0.386 Control not.low
## 69 0.260 Control not.low
## 70 0.148 Control not.low
## 71 0.193 Control not.low
## 72 0.605 Control not.low
## 73 0.028 Control low
## 74 0.420 Control not.low
## 75 0.549 Control not.low
## 76 0.705 Control not.low
## 77 0.708 Control not.low
## 78 0.181 Control not.low
## 79 1.108 Control not.low
## 80 0.268 Control not.low
## 81 0.614 Control not.low
## 82 0.809 Control not.low
## 83 0.438 Control not.low
## 84 0.189 Control not.low
## 85 0.399 Control not.low
## 86 0.738 Control not.low
## 87 0.270 Control not.low
## 88 0.178 Control not.low
## 89 0.475 Control not.low
## 90 0.464 Control not.low
## 91 0.292 Sjögren's not.low
## 92 0.423 Control not.low
## 93 0.399 Control not.low
## 94 0.579 Control not.low
## 95 0.199 Control not.low
## 96 0.517 Control not.low
## 97 0.264 Control not.low
## 98 0.150 Sjögren's not.low
## 99 0.709 Control not.low
## 100 0.263 Control not.low
## 101 0.559 Control not.low
## 102 0.435 Control not.low
## 103 0.326 Control not.low
## 104 0.017 Sjögren's low
## 105 0.232 Control not.low
## 106 0.198 Control not.low
## 107 0.756 Control not.low
## 108 0.313 Control not.low
## 109 0.180 Control not.low
## 110 0.475 Control not.low
## 111 1.156 Control not.low
## 112 0.698 Control not.low
## 113 0.757 Control not.low
## 114 0.442 Control not.low
## 115 0.316 Control not.low
## 116 0.127 Control not.low
## 117 0.507 Control not.low
## 118 0.470 Control not.low
## 119 0.404 Control not.low
## 120 0.412 Control not.low
## 121 0.677 Control not.low
## 122 0.283 Control not.low
## 123 0.242 Control not.low
## 124 0.267 Control not.low
## 125 0.660 Control not.low
## 126 0.691 Control not.low
## 127 0.183 Control not.low
## 128 0.427 Control not.low
## 129 0.273 Control not.low
## 130 0.539 Control not.low
model <- train(cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat +
phq_dry_tongue + phq_dry_lips + phq_dry_palate + uwsfr,
data = quest.df, method='ctree', tuneLength=5,
trControl=trainControl(
method='cv', number=3, classProbs=TRUE, summaryFunction=twoClassSummary))
png("~/Dropbox/oral-clinical-data-analysis-ms/Figure5b.png", res=80, height=800, width=800)
plot(fit)
dev.off()
## quartz_off_screen
## 2
library(caret)
set.seed(3456)
trainIndex <- createDataPartition(quest.df$cat, p = .7,
list = FALSE,
times = 1)
train <- quest.df[ trainIndex,]
test <- quest.df[-trainIndex,]
library(caret)
tmodel = ctree(formula=cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat +
phq_dry_tongue + phq_dry_lips + phq_dry_palate + uwsfr,
data = train)
print(tmodel)
##
## Conditional inference tree with 3 terminal nodes
##
## Response: cat
## Inputs: phq_dry_swallow, phq_dry_qol, phq_dry_speak, phq_dry_throat, phq_dry_tongue, phq_dry_lips, phq_dry_palate, uwsfr
## Number of observations: 92
##
## 1) phq_dry_qol <= 50; criterion = 1, statistic = 49.092
## 2) phq_dry_lips <= 28; criterion = 0.996, statistic = 12.24
## 3)* weights = 60
## 2) phq_dry_lips > 28
## 4)* weights = 24
## 1) phq_dry_qol > 50
## 5)* weights = 8
plot(tmodel)
pred = predict(tmodel, test)
foo = data.frame(pred, test)
table(foo$cat, foo$pred)
##
## low not.low
## low 3 2
## not.low 3 30
Supplementary Figure 5: Random forest analysis also identifies dryness of lips and impacts on quality of life as the most discriminant features. Random forest analysis was used to identify the most discriminant features included in the Visual Analog Scale. The out of box error rate for the random forest model was 8.46%. The most discriminant features were the impact of dry mouth on the overall quality of life (phq_dry_qol), the dryness of the lips (phq_dry_lips) consistent with the classification tree.
library(randomForest)
library(rfPermute)
fit <- randomForest(cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate + cohort, data=quest.df, proximity = TRUE)
print(fit) # view results
##
## Call:
## randomForest(formula = cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate + cohort, data = quest.df, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 8.46%
## Confusion matrix:
## low not.low class.error
## low 11 6 0.35294118
## not.low 5 108 0.04424779
importance(fit) # importance of each predictor
## MeanDecreaseGini
## phq_dry_swallow 2.630653
## phq_dry_qol 4.765104
## phq_dry_speak 3.450197
## phq_dry_throat 1.754508
## phq_dry_tongue 4.178521
## phq_dry_lips 4.300532
## phq_dry_palate 3.558803
## cohort 3.523465
imp = data.frame(caret::varImp(fit))
imp$factor = rownames(imp)
imp = imp[order(-imp$Overall),]
ordering = unique(imp$factor)
imp$factor = as.factor(as.character(imp$factor))
imp$factor <- factor(imp$factor, levels = ordering)
p = ggplot(imp, aes(factor, Overall)) + geom_point() + coord_flip() + theme_classic() +
xlab("Overall")
p
ggsave(p, file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS5.png", device="png", height = 6, width = 8)
#For teeth 7, 8, 22, and 27 the error 0 or 1 probability occurred;
tooth.model = tooth.model[!(tooth.model %in% c(1, 16, 17, 32))]
caries.df <- dplyr::select(caries_map, c("DMFS.bytooth", "cohort", "age", "uwsfr", "tooth")) %>%
filter(., !(tooth %in% c(1, 16, 17, 32) ))
caries.df = subset(caries.df, cohort=="Control" & age >=40 | cohort=="Low Flow")
cohort = caries.df$cohort
tooth = caries.df$tooth
caries.df$cohort = NULL
caries.df.scaled = data.frame(scale(caries.df, center=TRUE, scale=TRUE))
caries.df.scaled$tooth = tooth
caries.df.scaled$cohort = cohort
#dmfs.model = 'DMFS.bytooth ~ cohort + uwsfr + age'
dmfs.model = 'DMFS.bytooth ~ cohort + uwsfr + cohort:uwsfr + age'
df = get_spltmap_out(df=caries.df.scaled,
myvector=as.vector(caries.df.scaled$tooth),
model=dmfs.model)
df = define_tooth_class(df)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#combine coefs and confs and filter out intercept
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the DMFS Model
p1 = plot_age(df, model="DMFS")
p2 = plot_cohort(df, model="DMFS")
p3 = plot_flow(df, model="DMFS")
p4 = plot_interaction(df, model="DMFS")
Figure3A <- ggarrange((p1 + theme(legend.position = "none")),
(p2 + theme(legend.position = "none")),
(p3 + theme(legend.position = "none")),
(p4 + theme(legend.position = "none")), ncol = 4)
Set up CAL model - over 40
perio.df <- dplyr::select(perio_map, c("cal", "pd","gm.cej","bop",
"cohort", "age", "uwsfr", "tooth")) %>%
filter(., tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)
perio.df = subset(perio.df, cohort=="Control" & age >=40 | cohort=="Low Flow")
#transformations
#perio.df$cal = orderNorm(perio.df$cal)$x.t #normal
perio.df.scaled = data.frame(scale(perio.df, center=TRUE, scale=TRUE))
cal.model = 'cal_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df,
myvector=as.vector(perio.df$tooth),
model=cal.model)
df = define_tooth_class(mycoefficients)
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))
#Plot results for the CAL Model
p1 = plot_age(df, model="CAL")
p2 = plot_cohort(df, model="CAL")
p3 = plot_flow(df, model="CAL")
p4 = plot_interaction(df, model="CAL")
Figure3B <- ggarrange((p1 + theme(legend.position = "none")),
(p2 + theme(legend.position = "none")),
(p3 + theme(legend.position = "none")),
(p4 + theme(legend.position = "none")), ncol = 4)
Figure3B